Install and Load Required Packages

required_packages <- c(
  "raster", "ncdf4", "sf", "ggplot2", "dplyr", "tmap",
  "spdep", "knitr", "forecast", "gridExtra")

for (pkg in required_packages) {
  if (!requireNamespace(pkg, quietly = TRUE)) {
    install.packages(pkg)
  }
  suppressMessages(library(pkg, character.only = TRUE))
}

source("data/starima_package.R")

Data Preparation

# open the first .nc file to get the uk area bound box 
path <- 'data/tasmax_12km/tasmax_hadukgrid_uk_12km_mon_195301-195312.nc'
# open the NetCDF connection
nc <- nc_open(path)
# get the x,y bounds of data area
x_bnds <- ncvar_get(nc, 'projection_x_coordinate_bnds')
y_bnds <- ncvar_get(nc, 'projection_y_coordinate_bnds')
# close the NetCDF connection
nc_close(nc)
# create the sf data frame from polygons that are created by the bound box
# function of creation of polygon object from grid bounds
create_polygons_from_bounds <- function(x_bnds, y_bnds) {
  lapply(seq_len(dim(x_bnds)[2]), function(i) {
    lapply(seq_len(dim(y_bnds)[2]), function(j) {
      # Counterclockwise starting from the lower left corner
      polygon_coords <- matrix(c(x_bnds[1, i], y_bnds[1, j],
                                 x_bnds[1, i], y_bnds[2, j],
                                 x_bnds[2, i], y_bnds[2, j],
                                 x_bnds[2, i], y_bnds[1, j],
                                 x_bnds[1, i], y_bnds[1, j]),
                               byrow = TRUE, ncol = 2)
      st_polygon(list(polygon_coords))
    })
  }) |> unlist(recursive = FALSE)
}

# create the polygons
polygons <- create_polygons_from_bounds(x_bnds, y_bnds)
# combine the list of polygons into a single 'sfc' (simple feature collection) object
sfc_polygons <- st_sfc(polygons)
# set the CRS for the polygons (EPSG:27700) 
sfc_polygons <- st_set_crs(sfc_polygons, 27700)
# create an sf data frame
sf_df <- st_sf(geometry = sfc_polygons)
# write the tasmax values into the sf data frame, and create a matrix for following analysis
# directory where all the NetCDF files are located
file_dir <- "data/tasmax_12km"
file_names <- list.files(file_dir, pattern = "\\.nc$", full.names = TRUE)

# loop over each NetCDF file to construct the dataframe 
for (file_path in file_names) {
  # open the NetCDF file
  nc <- nc_open(file_path)
  # extract the year from the file name
  year <- sub(".*/tasmax_hadukgrid_uk_12km_mon_(\\d{4}).*.nc$", "\\1", file_path)
  # extract tasmax values for each month
  for (month in 1:12) {
    # construct the column name
    column_name <- paste(year, sprintf("%02d", month), sep = "-")
    # get the tasmax values for the current month
    tasmax_values <- ncvar_get(nc, 'tasmax', start = c(1, 1, month), count = c(-1, -1, 1))
    # number of rows (latitude/y) and columns (longitude/x)
    num_y <- dim(tasmax_values)[2]
    num_x <- dim(tasmax_values)[1]
    # initialize a vector to hold the reordered data
    reordered_tasmax_values <- vector("numeric", length = num_y * num_x)
    # loop through each grid cell in the order of the sf object and assign values from tasmax
    index <- 1
    for (x in 1:num_x) {
      for (y in 1:num_y) { 
        reordered_tasmax_values[index] <- tasmax_values[x, y]
        index <- index + 1
      }
    }
    # add it as a column to the sf object
    sf_df[[column_name]] <- reordered_tasmax_values
  }
  # close the NetCDF file
  nc_close(nc)
}

# remove rows with any NA values
sf_df_clean <- na.omit(sf_df)
# aggregate grid value to the England boroughs
Eng_boroughs <- st_read(dsn="data/England_borough.gpkg") %>% rename("geometry" = "geom")
## Reading layer `england_borough_simple' from data source 
##   `/Users/icarus/UCL/ucl_term2/CEGE0042/Assignment/coursework/data/England_borough.gpkg' 
##   using driver `GPKG'
## Simple feature collection with 312 features and 15 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 131961 ymin: 11183.87 xmax: 655980.8 ymax: 657600.4
## Projected CRS: OSGB36 / British National Grid
Eng_boroughs <- Eng_boroughs['NAME']
Eng_features <- st_read(dsn="data/England_feature.gpkg") %>% rename("geometry" = "geom")
## Reading layer `England_feature' from data source 
##   `/Users/icarus/UCL/ucl_term2/CEGE0042/Assignment/coursework/data/England_feature.gpkg' 
##   using driver `GPKG'
## Simple feature collection with 312 features and 24 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 131961 ymin: 11183.87 xmax: 655980.8 ymax: 657600.4
## Projected CRS: OSGB36 / British National Grid
columns_to_aggregate <- names(sf_df_clean)[which(names(sf_df_clean) != "geometry")]
sf_aggregated <- Eng_boroughs %>%
  st_join(sf_df_clean) %>%
  filter(!if_any(all_of(columns_to_aggregate), is.na)) %>% # remove rows where any of the columns_to_aggregate is NA
  group_by(NAME,geometry) %>%
  summarize(
    across(all_of(columns_to_aggregate), mean, na.rm = TRUE),
    .groups = 'drop'
  )
sf_aggregated1 <- left_join(sf_aggregated,as.data.frame(Eng_features)[,c('NAME', 'Elvation', 'latitude','prop_manmade_area','prop_forest','prop_water')],by = "NAME")
# drop the geometry column and then convert the data to a matrix
sf_aggregated_matrix<-data.matrix(st_drop_geometry(sf_aggregated[,-1]))
rownames(sf_aggregated_matrix) <- st_drop_geometry(sf_aggregated)$NAME
t_sf_aggregated_matrix <- t(sf_aggregated_matrix) # STACF etc. needs time series in columns
# data visualization
brks=quantile(as.numeric(unlist(sf_aggregated_matrix)), seq(0,1,0.1)) 
# the max temperature of January 2022
tm_shape(sf_aggregated)+ 
  tm_fill("2022-01", style="fixed", palette="-Spectral",breaks=brks,
          showNA= FALSE)+
  # tm_borders("white")+
  tm_compass(position=c("left","top"))+
  tm_legend(position=c("left","bottom"))+
  tm_scale_bar(breaks = c(0, 25, 50, 100,200),text.size=2) # +district_line
# the max temperature of July 2022
tm_shape(sf_aggregated)+ 
  tm_fill("2022-07", style="fixed", palette="-Spectral",breaks=brks,
          showNA= FALSE)+
  # tm_borders("white")+
  tm_compass(position=c("left","top"))+
  tm_legend(position=c("left","bottom"))+
  tm_scale_bar(breaks = c(0, 25, 50, 100,200),text.size=2) # +district_line

Exploratory spatio-temporal data analysis

# Examining non spatio-temporal data characteristics
mu2 = mean(sf_aggregated_matrix)
mu2
## [1] 13.63693
sdev2 = sd(sf_aggregated_matrix)
sdev2
## [1] 5.489434
# Examining non spatio-temporal data characteristics
hist(sf_aggregated_matrix)
abline(v=mu2, col="red")

qqnorm(sf_aggregated_matrix)
qqline(sf_aggregated_matrix, col="red")

# Examining non spatio-temporal data characteristics
pairs(~Elvation+latitude+prop_forest+rowMeans(sf_aggregated_matrix),data=as.data.frame(sf_aggregated1),
      main="Simple Scatterplot Matrix",
      panel=function(x, y, ...) {
          points(x, y, cex=0.5, ...) 
      }
)

# Examining temporal characteristics
plot(colMeans(sf_aggregated_matrix), xlab = "Month", ylab = "tasmax", type="l", xaxt="n")
axis(1, at = seq(0, 720, 120), labels=seq(1953, 2013, 10))

# Create the heatmap,with latitude descending from top to bottom
sf_aggregated1 <- left_join(sf_aggregated,as.data.frame(Eng_features)[,c('NAME', 'Elvation', 'latitude','prop_manmade_area','prop_forest','prop_water')],by = "NAME")
sf_latitude_order <- sf_aggregated1 %>% arrange(prop_water)
sf_latitude_ordermatrix <- data.matrix(as.data.frame(sf_latitude_order)[,3:842]) 
rownames(sf_latitude_ordermatrix) <- sf_latitude_order$NAME

heatmap(sf_latitude_ordermatrix,Rowv=NA,Colv=NA, col=heat.colors(256),scale="none", margins=c(5,3),xlab="Month", cexCol=1.1,y.scale.components.subticks(n=10))

# Examining spatial autocorrelation
W <- nb2listw(poly2nb(sf_aggregated),zero.policy = TRUE)
# two ways to do the statistical test of moran's I
moran.test(x=rowMeans(sf_aggregated_matrix), listw=W)
## 
##  Moran I test under randomisation
## 
## data:  rowMeans(sf_aggregated_matrix)  
## weights: W    
## 
## Moran I statistic standard deviate = 25.002, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.920157307      -0.003246753       0.001364009
moran.mc(x=rowMeans(sf_aggregated_matrix), listw=W, nsim=9999)
## 
##  Monte-Carlo simulation of Moran I
## 
## data:  rowMeans(sf_aggregated_matrix) 
## weights: W  
## number of simulations + 1: 10000 
## 
## statistic = 0.92016, observed rank = 10000, p-value = 1e-04
## alternative hypothesis: greater
# Examining spatial autocorrelation
# local moran' I
Ii <- localmoran(x=rowMeans(sf_aggregated_matrix), listw=W)
eng_tasmax_sf <- sf_aggregated[,2]
eng_tasmax_sf$avg_tasmax <- as.data.frame(rowMeans(sf_aggregated_matrix))[,1]
eng_tasmax_sf$Ii <- Ii[,'Ii']
tm_shape(eng_tasmax_sf) +
  tm_borders(lwd = 0.1)+ 
  tm_polygons(col="Ii", palette="-RdBu", style="quantile",size= 0)

# unadjusted p-values
eng_tasmax_sf$Iip_unadjusted <- Ii[,"Pr(z != E(Ii))"]
eng_tasmax_sf$Ii_un_sig <- "nonsignificant"
eng_tasmax_sf$Ii_un_sig[which(eng_tasmax_sf$Iip_unadjusted < 0.05)] <- "significant"
# statistically significant units with the unadjusted p-value
# tm_shape(eng_tasmax_sf) +
#   tm_borders(lwd = 0.1)+ 
#   tm_polygons(col="Ii_un_sig", palette="-RdBu")

# apply the Bonferroni adjustment to the p-values directly using the  p.adjust
eng_tasmax_sf$Iip_adjusted <- p.adjust(eng_tasmax_sf$Iip_unadjusted, method="bonferroni")  
eng_tasmax_sf$Ii_ad_sig <- "nonsignificant"
eng_tasmax_sf$Ii_ad_sig[which(eng_tasmax_sf$Iip_adjusted < 0.05)] <- "significant"
# tm_shape(eng_tasmax_sf) +
#   tm_borders(lwd = 0)+ 
#   tm_polygons(col="Ii_ad_sig", palette="-RdBu")

# steps to get the clusters of high value and low value
moranCluster <- function(shape, W, var, alpha=0.05, p.adjust.method="bonferroni")  
{
  # Code adapted from https://rpubs.com/Hailstone/346625
  Ii <- localmoran(shape[[var]], W)
  shape$Ii <- Ii[,"Ii"]
  Iip <- p.adjust(Ii[,"Pr(z != E(Ii))"], method=p.adjust.method)
  shape$Iip <- Iip
  shape$sig <- shape$Iip<alpha
  # Scale the data to obtain low and high values
  shape$scaled <- scale(shape[[var]]) # high low values at location i
  shape$lag_scaled <- lag.listw(W, shape$scaled) # high low values at neighbours j
  shape$lag_cat <- factor(ifelse(shape$scaled>0 & shape$lag_scaled>0, "HH",
                                 ifelse(shape$scaled>0 & shape$lag_scaled<0, "HL",
                                        ifelse(shape$scaled<0 & shape$lag_scaled<0, "LL",
                                               ifelse(shape$scaled<0 & shape$lag_scaled<0, "LH", "Equivalent")))))
  shape$sig_cluster <- as.character(shape$lag_cat)
  shape$sig_cluster[!shape$sig] <- "Non-sig"
  shape$sig_cluster <- as.factor(shape$sig_cluster)
  results <- data.frame(Ii=shape$Ii, pvalue=shape$Iip, type=shape$lag_cat, sig=shape$sig_cluster)
  
  return(list(results=results))
}
clusters <- moranCluster(eng_tasmax_sf, W=W, var="avg_tasmax")$results
eng_tasmax_sf$Ii_cluster <- clusters$sig

tm_shape(eng_tasmax_sf) + tm_borders(lwd = 0)+tm_polygons(col="Ii_cluster")

Methodology and results

Time series decomposition

# Decomposition for City of London time series
ts_london <- ts(t_sf_aggregated_matrix[,'City and County of the City of London'], start=c(1953-01, 1), frequency=12)

decom_london <- stats::decompose(ts_london)
autoplot(decom_london)

S-ARIMA Models

Autocorrelation and partial autocorrelation analysis

# autocorrelation of a specific borough(London), max temperature series for the first 48 lags
acf(sf_aggregated_matrix['City and County of the City of London',], lag.max=48, xlab="Lag", ylab="ACF", main="Autocorrelation plot of monthly maximum air temperatures")
pacf(sf_aggregated_matrix['City and County of the City of London',], lag.max=48, xlab="Lag", ylab="PACF",main="Partial Autocorrelation plot") 

T.s.diff <- diff(sf_aggregated_matrix['City and County of the City of London',], lag=12, differences=1)

acf(T.s.diff, lag.max=48, xlab="Lag", ylab="ACF", main="Differenced Autocorrelation plot") 
# q candidates:[1,2,3/1,2,3,4,5]
pacf(T.s.diff, lag.max=48, xlab="Lag", ylab="PACF",main="Differenced Partial Autocorrelation plot") 
# p candidates:[1/1,2]

Parameter estimation and fitting

# ARIMA(1,0,3)(2,1,5)12-----initial combination of parameters
fit.ar <- arima(sf_aggregated_matrix['City and County of the City of London',1:780],order=c(1,0,3),seasonal=list(order=c(2,1,5),period=12))
fit.ar  # aic = 2914.9
## 
## Call:
## arima(x = sf_aggregated_matrix["City and County of the City of London", 1:780], 
##     order = c(1, 0, 3), seasonal = list(order = c(2, 1, 5), period = 12))
## 
## Coefficients:
##          ar1      ma1      ma2      ma3    sar1     sar2     sma1    sma2
##       0.9989  -0.7475  -0.1611  -0.0637  0.9209  -0.8034  -1.9012  1.6673
## s.e.  0.0030   0.0363   0.0476   0.0368  0.1104   0.1626   0.1174  0.2457
##          sma3     sma4    sma5
##       -0.7369  -0.0875  0.0768
## s.e.   0.1794   0.0858  0.0419
## 
## sigma^2 estimated as 2.403:  log likelihood = -1445.45,  aic = 2914.9
NRMSE_fit <- NRMSE(res=fit.ar$residuals, obs=sf_aggregated_matrix['City and County of the City of London',1:780])
NRMSE_fit # 0.2700663
## [1] 0.2700663
# several try came across different directions
# ARIMA(1,0,3)(0,1,5)12
fit.ar <- arima(sf_aggregated_matrix['City and County of the City of London',1:780],order=c(1,0,3),seasonal=list(order=c(0,1,5),period=12))
fit.ar  # aic = 2914.49
## 
## Call:
## arima(x = sf_aggregated_matrix["City and County of the City of London", 1:780], 
##     order = c(1, 0, 3), seasonal = list(order = c(0, 1, 5), period = 12))
## 
## Coefficients:
##          ar1      ma1      ma2      ma3     sma1     sma2    sma3     sma4
##       0.9987  -0.7483  -0.1609  -0.0633  -0.9733  -0.0425  0.0088  -0.0543
## s.e.  0.0031   0.0362   0.0475   0.0364   0.0390   0.0499  0.0503   0.0509
##         sma5
##       0.0868
## s.e.  0.0351
## 
## sigma^2 estimated as 2.421:  log likelihood = -1447.24,  aic = 2914.49
NRMSE_fit <- NRMSE(res=fit.ar$residuals, obs=sf_aggregated_matrix['City and County of the City of London',1:780])
NRMSE_fit # 0.2710715
## [1] 0.2710715
# ARIMA (1,0,4) (2,1,5)12
fit.ar <- arima(sf_aggregated_matrix['City and County of the City of London',1:780],order=c(1,0,4),seasonal=list(order=c(2,1,5),period=12))
fit.ar  # aic = 2913.22
## 
## Call:
## arima(x = sf_aggregated_matrix["City and County of the City of London", 1:780], 
##     order = c(1, 0, 4), seasonal = list(order = c(2, 1, 5), period = 12))
## 
## Coefficients:
##          ar1      ma1      ma2      ma3      ma4    sar1     sar2     sma1
##       0.9988  -0.7510  -0.1346  -0.0232  -0.0639  1.5263  -0.8550  -2.5096
## s.e.  0.0030   0.0365   0.0460   0.0439   0.0362  0.0888   0.0849   0.0999
##         sma2     sma3    sma4     sma5
##       2.3163  -0.8060  0.0313  -0.0262
## s.e.  0.1892   0.1488  0.1107   0.0448
## 
## sigma^2 estimated as 2.387:  log likelihood = -1443.61,  aic = 2913.22
NRMSE_fit <- NRMSE(res=fit.ar$residuals, obs=sf_aggregated_matrix['City and County of the City of London',1:780])
NRMSE_fit # 0.2691755
## [1] 0.2691755
# ARIMA(1,0,3) (2,1,6)12----the selected combination 
fit.ar <- arima(sf_aggregated_matrix['City and County of the City of London',1:780],order=c(1,0,3),seasonal=list(order=c(2,1,6),period=12))
fit.ar  # aic = 2910.26
## 
## Call:
## arima(x = sf_aggregated_matrix["City and County of the City of London", 1:780], 
##     order = c(1, 0, 3), seasonal = list(order = c(2, 1, 6), period = 12))
## 
## Coefficients:
##          ar1      ma1      ma2      ma3     sar1     sar2   sma1     sma2
##       0.9992  -0.7507  -0.1619  -0.0603  -1.7140  -0.7717  0.756  -0.9537
## s.e.  0.0028   0.0362   0.0477   0.0369   0.1017   0.1027  0.111   0.0592
##          sma3     sma4    sma5    sma6
##       -0.8414  -0.0597  0.0482  0.1161
## s.e.   0.1275   0.0584  0.0512  0.0432
## 
## sigma^2 estimated as 2.363:  log likelihood = -1442.13,  aic = 2910.26
NRMSE_fit <- NRMSE(res=fit.ar$residuals, obs=sf_aggregated_matrix['City and County of the City of London',1:780])
NRMSE_fit # 0.2678122
## [1] 0.2678122
# # ADDITIONAL TRY-----------
# # ARI(1,0,0)(2,1,0)12
# fit.ar <- arima(sf_aggregated_matrix['City and County of the City of London',1:780],order=c(1,0,0),seasonal=list(order=c(2,1,0),period=12))
# fit.ar  # aic = 3099.05
# NRMSE_fit <- NRMSE(res=fit.ar$residuals, obs=sf_aggregated_matrix['City and County of the City of London',1:780])
# NRMSE_fit # 0.3142105
# 
# # IMA(0,0,3)(0,1,5)12
# fit.ar <- arima(sf_aggregated_matrix['City and County of the City of London',1:780],order=c(0,0,3),seasonal=list(order=c(0,1,5),period=12))
# fit.ar  # aic = 2926.33
# NRMSE_fit <- NRMSE(res=fit.ar$residuals, obs=sf_aggregated_matrix['City and County of the City of London',1:780])
# NRMSE_fit # 0.2756825
# 
# # ARIMA(1,0,4)(2,1,1)12
# fit.ar <- arima(sf_aggregated_matrix['City and County of the City of London',1:780],order=c(1,0,4),seasonal=list(order=c(2,1,1),period=12))
# fit.ar  # aic = 2916.47
# NRMSE_fit <- NRMSE(res=fit.ar$residuals, obs=sf_aggregated_matrix['City and County of the City of London',1:780])
# NRMSE_fit # 0.2701743
# 
# # ARIMA(1,0,4)(4,1,1)12
# fit.ar <- arima(sf_aggregated_matrix['City and County of the City of London',1:780],order=c(1,0,4),seasonal=list(order=c(4,1,1),period=12))
# fit.ar  # aic = 2913
# NRMSE_fit <- NRMSE(res=fit.ar$residuals, obs=sf_aggregated_matrix['City and County of the City of London',1:780])
# NRMSE_fit # 0.2703979

Diagnostic Checking

tsdiag(fit.ar)

Prediction

fit.Ar <- Arima(sf_aggregated_matrix['City and County of the City of London',1:780],order=c(1,0,3),seasonal=list(order=c(2,1,6),period=12))
pre.Ar <- Arima(sf_aggregated_matrix['City and County of the City of London',781:840], model=fit.Ar)
matplot(cbind(pre.Ar$fitted, pre.Ar$x), type="l")

NRMSE_fit1 <- NRMSE(res=pre.Ar$residuals, obs=sf_aggregated_matrix['City and County of the City of London',781:840])
NRMSE_fit1 # 0.2250566
## [1] 0.2250566
# Fitting the model using auto.arima
fit.auto.ar <- auto.arima(sf_aggregated_matrix['City and County of the City of London',1:780])
# fit.auto.ar # aic = 3244.75
NRMSE_fit2 <- NRMSE(res=fit.auto.ar$residuals, obs=sf_aggregated_matrix['City and County of the City of London',1:780])
NRMSE_fit2 # 0.3356572
## [1] 0.3356572
fit.auto.Ar <- Arima(sf_aggregated_matrix['City and County of the City of London',1:780],order=c(5,0,1))
pre.auto.Ar <- Arima(sf_aggregated_matrix['City and County of the City of London',781:840], model=fit.auto.Ar)
NRMSE_fit22 <- NRMSE(res=pre.auto.Ar$residuals, obs=sf_aggregated_matrix['City and County of the City of London',781:840])
NRMSE_fit22 # 0.3643618
## [1] 0.3643618
matplot(cbind(pre.auto.Ar$fitted, pre.auto.Ar$x), type="l")

STARIMA Models

Weight Matrix Definition

nbrs <- poly2nb(sf_aggregated)
W1 <- nb2mat(nbrs)
Wlist <- nblag(nbrs, 3)
W2 <- nb2mat(Wlist[[2]])
W0 <- diag(x=1, nrow(W1), ncol(W1))

Spatiotemporal autocorrelation analysis

t_sf_aggregated_matrix.diff <- diff(t_sf_aggregated_matrix,lag=12,differences= 1)
# stacf(t_sf_aggregated_matrix.diff, W0, 120)
# ACF
stacf(t_sf_aggregated_matrix, W1, 60)
## $stacf
##                [,1]
##  [1,]  0.9999290777
##  [2,]  0.8169726479
##  [3,]  0.4687174737
##  [4,]  0.0110568493
##  [5,] -0.4446868245
##  [6,] -0.7686059935
##  [7,] -0.8837675556
##  [8,] -0.7643580206
##  [9,] -0.4393905430
## [10,]  0.0112265452
## [11,]  0.4646421308
## [12,]  0.8004143850
## [13,]  0.9240301916
## [14,]  0.7997389802
## [15,]  0.4565081518
## [16,] -0.0022004818
## [17,] -0.4472335567
## [18,] -0.7702766309
## [19,] -0.8882387225
## [20,] -0.7654490940
## [21,] -0.4455266179
## [22,]  0.0063584292
## [23,]  0.4615246107
## [24,]  0.7965523704
## [25,]  0.9211665281
## [26,]  0.7996993195
## [27,]  0.4598058376
## [28,]  0.0085338856
## [29,] -0.4377617504
## [30,] -0.7656404490
## [31,] -0.8851801011
## [32,] -0.7666456501
## [33,] -0.4448661219
## [34,]  0.0078345007
## [35,]  0.4640311227
## [36,]  0.7966409156
## [37,]  0.9182925546
## [38,]  0.7905333223
## [39,]  0.4544180378
## [40,]  0.0018943912
## [41,] -0.4517015912
## [42,] -0.7750635707
## [43,] -0.8942210552
## [44,] -0.7729210599
## [45,] -0.4364447751
## [46,]  0.0097535012
## [47,]  0.4628237817
## [48,]  0.7998780818
## [49,]  0.9158395831
## [50,]  0.7932724486
## [51,]  0.4549562664
## [52,] -0.0005737165
## [53,] -0.4510442644
## [54,] -0.7797966651
## [55,] -0.8937585343
## [56,] -0.7681247032
## [57,] -0.4379995833
## [58,]  0.0176411661
## [59,]  0.4720303111
## [60,]  0.8002646188
## [61,]  0.9221265476
## 
## $ubound
## [1] 0.004073835
## 
## $lbound
## [1] -0.004073835
# ACF-diff
stacf(t_sf_aggregated_matrix.diff, W1, 60)
## $stacf
##                [,1]
##  [1,]  9.995814e-01
##  [2,]  2.286773e-01
##  [3,]  1.119496e-01
##  [4,]  8.697510e-02
##  [5,] -5.807124e-03
##  [6,] -2.342842e-02
##  [7,]  2.099066e-02
##  [8,]  2.585720e-02
##  [9,]  5.370462e-02
## [10,]  2.308844e-02
## [11,] -9.384988e-03
## [12,] -8.033242e-02
## [13,] -4.733424e-01
## [14,] -1.058099e-01
## [15,] -8.687575e-02
## [16,] -1.453961e-01
## [17,] -7.815240e-02
## [18,] -2.980636e-02
## [19,] -3.802736e-02
## [20,]  1.169057e-02
## [21,] -2.333079e-02
## [22,] -1.986062e-02
## [23,] -3.961391e-02
## [24,] -4.674311e-02
## [25,] -2.194464e-02
## [26,]  3.822934e-02
## [27,]  2.030813e-02
## [28,]  9.281782e-02
## [29,]  1.369636e-01
## [30,]  8.868310e-02
## [31,]  8.727283e-02
## [32,]  3.883168e-02
## [33,] -6.075257e-02
## [34,] -1.692273e-02
## [35,]  2.884737e-02
## [36,]  2.665530e-03
## [37,]  9.723611e-03
## [38,] -6.362536e-02
## [39,] -1.363086e-03
## [40,]  1.385593e-05
## [41,] -8.311346e-02
## [42,] -5.186458e-02
## [43,] -9.719636e-02
## [44,] -1.077519e-01
## [45,]  4.766322e-02
## [46,] -5.899173e-02
## [47,] -9.184349e-02
## [48,] -7.325033e-03
## [49,] -6.413842e-02
## [50,]  1.849988e-02
## [51,]  6.412118e-03
## [52,] -1.873746e-02
## [53,]  5.151089e-02
## [54,] -9.082154e-03
## [55,]  1.483744e-02
## [56,]  4.546322e-02
## [57,]  1.291029e-02
## [58,]  1.069642e-01
## [59,]  1.298391e-01
## [60,]  3.126883e-02
## [61,]  5.445497e-02
## 
## $ubound
## [1] 0.004105539
## 
## $lbound
## [1] -0.004105539
# PACF
stpacf(t_sf_aggregated_matrix, W1, 60)
## $stpacf
##                [,1]
##  [1,] -3.849070e-04
##  [2,] -2.136039e-03
##  [3,]  1.261786e-03
##  [4,]  2.212410e-03
##  [5,]  2.825496e-03
##  [6,]  1.971271e-03
##  [7,]  8.584163e-04
##  [8,]  7.076142e-04
##  [9,]  1.564163e-03
## [10,]  9.049861e-04
## [11,] -2.775790e-04
## [12,] -1.045866e-03
## [13,]  1.549707e-03
## [14,] -6.536937e-05
## [15,] -2.384886e-04
## [16,]  4.234760e-04
## [17,]  5.853537e-04
## [18,] -2.045278e-04
## [19,]  2.516246e-04
## [20,] -8.373397e-04
## [21,]  1.302730e-03
## [22,]  6.291935e-04
## [23,]  3.210108e-04
## [24,] -1.387848e-03
## [25,]  1.873651e-03
## [26,]  2.818730e-04
## [27,]  1.430196e-03
## [28,]  9.187508e-06
## [29,]  2.755650e-04
## [30,] -2.543481e-04
## [31,]  2.687892e-04
## [32,] -8.208603e-04
## [33,]  1.503462e-03
## [34,]  8.018407e-04
## [35,] -5.580112e-04
## [36,] -1.732145e-03
## [37,]  4.459371e-04
## [38,]  1.643785e-04
## [39,]  3.661412e-04
## [40,] -2.239342e-03
## [41,] -4.724246e-05
## [42,]  8.545967e-05
## [43,]  7.603769e-04
## [44,]  7.329521e-04
## [45,]  5.910613e-04
## [46,]  1.935461e-04
## [47,]  5.586757e-04
## [48,] -1.521238e-03
## [49,]  1.715927e-03
## [50,] -1.150768e-04
## [51,]  5.161281e-04
## [52,] -9.078895e-04
## [53,]  2.505487e-04
## [54,] -4.533890e-04
## [55,]  9.376680e-04
## [56,] -3.774466e-04
## [57,]  1.089926e-03
## [58,]  8.336141e-04
## [59,]  1.279946e-03
## [60,] -1.470980e-03
## 
## $ubound
## [1] 0.06904767
## 
## $lbound
## [1] -0.06904767
# PACF-diff
stpacf(t_sf_aggregated_matrix.diff,W1,60)
## $stpacf
##                [,1]
##  [1,] -3.091437e-04
##  [2,]  1.281623e-04
##  [3,]  9.132655e-05
##  [4,] -1.091403e-03
##  [5,] -7.104038e-04
##  [6,]  7.392717e-04
##  [7,] -1.452191e-04
##  [8,] -4.702894e-06
##  [9,] -2.786141e-04
## [10,]  5.588406e-05
## [11,] -1.136630e-03
## [12,]  2.394945e-04
## [13,] -2.236226e-05
## [14,] -4.846903e-04
## [15,] -1.624324e-03
## [16,] -6.809392e-04
## [17,] -2.758485e-04
## [18,]  4.754280e-04
## [19,]  4.731520e-05
## [20,] -4.663121e-04
## [21,] -4.824278e-05
## [22,] -3.629338e-04
## [23,] -2.442398e-04
## [24,]  3.982024e-04
## [25,]  1.175675e-03
## [26,] -4.400386e-04
## [27,] -3.114557e-04
## [28,]  1.291379e-03
## [29,]  2.866187e-04
## [30,]  6.791798e-04
## [31,] -3.618677e-04
## [32,] -1.887685e-03
## [33,]  2.988513e-04
## [34,]  1.821066e-04
## [35,] -9.750323e-04
## [36,]  2.871136e-04
## [37,] -4.238351e-04
## [38,] -1.533457e-04
## [39,]  2.148763e-05
## [40,] -1.741711e-04
## [41,] -8.865834e-05
## [42,]  6.955403e-04
## [43,] -4.321576e-04
## [44,] -3.573184e-04
## [45,] -2.798004e-04
## [46,] -6.596482e-04
## [47,] -1.322149e-03
## [48,] -5.775746e-05
## [49,]  8.162509e-04
## [50,] -2.887874e-05
## [51,]  1.016208e-03
## [52,]  6.106386e-04
## [53,] -7.002192e-04
## [54,]  1.798726e-04
## [55,] -4.899561e-04
## [56,]  4.153639e-04
## [57,]  2.340743e-04
## [58,] -1.417819e-03
## [59,] -9.156211e-04
## [60,] -5.408306e-04
## 
## $ubound
## [1] 0.06954681
## 
## $lbound
## [1] -0.06954681

Parameter estimation and fitting

W_fit<-list(w1=W1) # Create a list of spatial weight matrices, zero not needed
# W_fit$w2 <- W2
fit.star101 <- starima_fit(Z=t_sf_aggregated_matrix[1:780,],W=W_fit,p=1,d=12,q=1)  
fit.star102 <- starima_fit(Z=t_sf_aggregated_matrix[1:780,],W=W_fit,p=1,d=12,q=2)  
fit.star103 <- starima_fit(Z=t_sf_aggregated_matrix[1:780,],W=W_fit,p=1,d=12,q=3)  
fit.star001 <- starima_fit(Z=t_sf_aggregated_matrix[1:780,],W=W_fit,p=0,d=12,q=1)  
fit.star100 <- starima_fit(Z=t_sf_aggregated_matrix[1:780,],W=W_fit,p=1,d=12,q=0)
fit.star101$NRMSE[56] # 0.3812036
## [1] 0.3812036
fit.star102$NRMSE[56] # 0.3814073
## [1] 0.3814073
fit.star103$NRMSE[56] # 0.3814507
## [1] 0.3814507
fit.star001$NRMSE[56] # 0.3912485
## [1] 0.3912485
fit.star100$NRMSE[56] # 0.3806741 the selected one
## [1] 0.3806741

Diagnostic Checking

fit.star100$NRMSE[56]
## [1] 0.3806741
stacf(fit.star100$RES,W1,48)
## $stacf
##               [,1]
##  [1,]  0.999557362
##  [2,] -0.017462444
##  [3,]  0.063561127
##  [4,]  0.059336483
##  [5,] -0.022228503
##  [6,] -0.023005605
##  [7,]  0.018907577
##  [8,] -0.004452851
##  [9,]  0.060709005
## [10,]  0.030220758
## [11,] -0.004711100
## [12,]  0.049413949
## [13,] -0.470303979
## [14,]  0.004562637
## [15,] -0.054697701
## [16,] -0.129042109
## [17,] -0.055298692
## [18,] -0.004984029
## [19,] -0.034223244
## [20,]  0.046242887
## [21,] -0.039347854
## [22,] -0.025301238
## [23,] -0.027501872
## [24,] -0.058370832
## [25,] -0.033533346
## [26,]  0.063029396
## [27,]  0.010009803
## [28,]  0.087542898
## [29,]  0.120555072
## [30,]  0.043103752
## [31,]  0.069810373
## [32,]  0.019673195
## [33,] -0.058716917
## [34,]  0.002415902
## [35,]  0.034191559
## [36,] -0.007401483
## [37,]  0.023994561
## [38,] -0.084401384
## [39,] -0.004146078
## [40,]  0.017040948
## [41,] -0.080377359
## [42,] -0.009917727
## [43,] -0.073756230
## [44,] -0.110362638
## [45,]  0.069381836
## [46,] -0.055597180
## [47,] -0.079457702
## [48,]  0.041195421
## [49,] -0.066533418
## 
## $ubound
## [1] 0.00424313
## 
## $lbound
## [1] -0.00424313
hist(fit.star100$RES[,56])
Box.test(fit.star100$RES[,56],lag=1, type="Ljung") # p= 0.7233
## 
##  Box-Ljung test
## 
## data:  fit.star100$RES[, 56]
## X-squared = 0.12535, df = 1, p-value = 0.7233

Forecasting

pre.star <- starima_pre(t_sf_aggregated_matrix[(780-12-1+1):840,],model=fit.star100)
matplot(1:60,cbind(t_sf_aggregated_matrix[781:840,56],pre.star$PRE[,56]),type="l")

# show the forecast accuracy of the STARIMA model vary across the study area
df <- as.data.frame(pre.star$PRE['2021-07',])
df$OBS <- as.vector(pre.star$OBS['2021-07',])
colnames(df)[1:2] <- c("PRE", "OBS")
df <- df %>%
  mutate(differ = PRE - OBS)
df$NAME <- rownames(df)
sf_aggregated2 <- sf_aggregated1[,1]
sf_aggregated2 <- sf_aggregated2 %>%
  left_join(df, by = "NAME")
ggplot(sf_aggregated2) +
  geom_sf(aes(fill = differ)) +
  scale_fill_viridis_c() +
  theme_minimal() +
  labs(fill = "Difference")